# 请按照您的真实环境修改 set_env.sh 路径
source set_env.sh

MG_DATASET_PATH=/home/ma-user/work/dataset/mg_data-new/open-reasoner-zero/
mkdir -p $MG_DATASET_PATH

python preprocess_data.py \
	--input /home/ma-user/work/users/qzh/data/Open-Reasoner-Zero/orz_math_57k_collected_2.json \
	--tokenizer-name-or-path /home/ma-user/work/models/hf_models/Qwen/Qwen2.5-32B/ \
	--output-prefix $MG_DATASET_PATH \
	--handler-name R1AlpacaStyleInstructionHandler \
	--tokenizer-type PretrainedFromHF \
	--workers 16 \
	--log-interval 1000 \
	--prompt-type open_reasoner_zero \
	--dataset-additional-keys labels \
	--map-keys '{"prompt":"value", "query":"", "response":"ground_truth", "system":""}' # 默认值，可不传
  # --map-keys '{"prompt":"instruction","query":"input","response":"output"}' # 默认值，可不传